## [1] "/Users/BinjieLai/Documents/udacity/R/EDA_Course_Materials/Project 3 Binjie_lai_update"
## [1] "Date" "Year" "Month" "Day"
## [5] "PM2.5" "PM10" "CO" "SO2"
## [9] "NO2" "X1hO3" "X8hO3" "WD"
## [13] "WS" "Temp" "RH" "sea.level.pres"
## [17] "X6hr.precip" "dewpoint" "visibility" "City"
## [21] "lon" "lat"
## Date Year Month Day
## 1/1/14 : 3 Min. :2013 Min. : 1.000 Min. : 1.00
## 1/10/14: 3 1st Qu.:2013 1st Qu.: 4.000 1st Qu.: 8.00
## 1/11/14: 3 Median :2013 Median : 7.000 Median :16.00
## 1/12/14: 3 Mean :2013 Mean : 6.526 Mean :15.72
## 1/13/14: 3 3rd Qu.:2013 3rd Qu.:10.000 3rd Qu.:23.00
## 1/14/14: 3 Max. :2014 Max. :12.000 Max. :31.00
## (Other):1077
## PM2.5 PM10 CO SO2
## Min. : 5.42 Min. : 14.65 :143 Min. : 0.7249
## 1st Qu.: 30.55 1st Qu.: 50.21 686 : 5 1st Qu.: 4.1049
## Median : 50.69 Median : 74.89 743 : 5 Median : 6.0504
## Mean : 65.22 Mean : 87.08 454 : 4 Mean : 7.6950
## 3rd Qu.: 83.00 3rd Qu.:108.70 498 : 4 3rd Qu.: 9.3388
## Max. :391.63 Max. :408.78 505 : 4 Max. :45.6772
## NA's :94 NA's :152 (Other):930 NA's :118
## NO2 X1hO3 X8hO3 WD
## Min. : 3.256 Min. : 4.285 Min. : 2.738 Min. : 0.3189
## 1st Qu.:16.033 1st Qu.: 34.931 1st Qu.: 28.675 1st Qu.: 56.3967
## Median :20.881 Median : 50.094 Median : 42.684 Median :143.7384
## Mean :23.321 Mean : 55.672 Mean : 46.078 Mean :158.4243
## 3rd Qu.:28.883 3rd Qu.: 71.025 3rd Qu.: 58.548 3rd Qu.:249.3315
## Max. :66.551 Max. :149.858 Max. :134.037 Max. :360.0000
## NA's :119 NA's :159 NA's :159 NA's :13
## WS Temp RH sea.level.pres
## Min. :0.0156 Min. :-5.450 Min. :11.60 Min. : 11.00
## 1st Qu.:0.9601 1st Qu.: 9.255 1st Qu.:54.77 1st Qu.: 96.62
## Median :1.5978 Median :18.937 Median :69.29 Median : 176.17
## Mean :1.7865 Mean :17.371 Mean :66.39 Mean : 248.21
## 3rd Qu.:2.4521 3rd Qu.:25.725 3rd Qu.:79.71 3rd Qu.: 239.69
## Max. :5.8587 Max. :35.367 Max. :98.35 Max. :25795.38
## NA's :13 NA's :13 NA's :13 NA's :13
## X6hr.precip dewpoint visibility City
## Min. : 0.000 Min. :-28.375 Min. : 0.625 Beijing :365
## 1st Qu.: 0.000 1st Qu.: 1.121 1st Qu.: 9.565 Guangzhou:365
## Median : 0.000 Median : 12.314 Median :15.000 Shanghai :365
## Mean : 1.146 Mean : 10.040 Mean :15.632
## 3rd Qu.: 0.300 3rd Qu.: 20.970 3rd Qu.:21.400
## Max. :40.000 Max. : 26.648 Max. :30.000
## NA's :13 NA's :13 NA's :13
## lon lat
## Min. :23.12 Min. :113.3
## 1st Qu.:23.12 1st Qu.:113.3
## Median :31.25 Median :116.4
## Mean :31.43 Mean :117.1
## 3rd Qu.:39.91 3rd Qu.:121.5
## Max. :39.91 Max. :121.5
##
=============================================================================
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.625 9.565 15.000 15.630 21.400 30.000 13
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 5.42 30.55 50.69 65.22 83.00 391.60 94
=============================================================================
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## Warning: Removed 13 rows containing missing values (stat_smooth).
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
## Warning: Removed 94 rows containing missing values (stat_smooth).
## Warning: Removed 4 rows containing missing values (geom_path).
## Warning: Removed 107 rows containing missing values (geom_point).
##
## Pearson's product-moment correlation
##
## data: PM2.5 and visibility
## t = -27.3105, df = 986, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.6903674 -0.6192317
## sample estimates:
## cor
## -0.6562553
## Warning: Removed 94 rows containing non-finite values (stat_boxplot).
## Warning: Removed 123 rows containing non-finite values (stat_boxplot).
## Warning: Removed 57 rows containing missing values (geom_point).
##
## Pearson's product-moment correlation
##
## data: PM10 and visibility
## t = -19.284, df = 929, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.5790216 -0.4871461
## sample estimates:
## cor
## -0.5346619
##
## Pearson's product-moment correlation
##
## data: RH and visibility
## t = -12.3324, df = 1080, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.4025073 -0.2979811
## sample estimates:
## cor
## -0.3513385
##
## Pearson's product-moment correlation
##
## data: WS and visibility
## t = 7.1959, df = 1080, p-value = 1.159e-12
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.1562920 0.2700506
## sample estimates:
## cor
## 0.2138964
##
## Pearson's product-moment correlation
##
## data: Temp and visibility
## t = 9.3492, df = 1080, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.2175805 0.3278789
## sample estimates:
## cor
## 0.273629
## Warning: Removed 107 rows containing missing values (geom_point).
##
## Pearson's product-moment correlation
##
## data: WS and PM2.5
## t = -8.0374, df = 986, p-value = 2.61e-15
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.3056120 -0.1885168
## sample estimates:
## cor
## -0.2479699
=============================================================================
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
## Warning: Removed 7 rows containing missing values (stat_smooth).
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
## Warning: Removed 3 rows containing missing values (stat_smooth).
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
## Warning: Removed 3 rows containing missing values (stat_smooth).
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
## Warning: Removed 23 rows containing missing values (stat_smooth).
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
## Warning: Removed 45 rows containing missing values (stat_smooth).
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
## Warning: Removed 26 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_path).
## Warning: Removed 30 rows containing missing values (geom_point).
## Warning: Removed 48 rows containing missing values (geom_point).
## Warning: Removed 29 rows containing missing values (geom_point).
## Warning: Removed 7 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).
## Warning: Removed 3 rows containing missing values (geom_point).
## [1] 0.3247127
## [1] 0.08935503
## [1] 0.2317084
============================================================================
=============================================================================
=============================================================================
## Warning: Removed 94 rows containing missing values (geom_point).
## Warning: Removed 23 rows containing missing values (geom_point).
## Warning: Removed 45 rows containing missing values (geom_point).
## Warning: Removed 26 rows containing missing values (geom_point).
##
## Pearson's product-moment correlation
##
## data: PM2.5 and visibility
## t = -27.3105, df = 986, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.6903674 -0.6192317
## sample estimates:
## cor
## -0.6562553
##
## Pearson's product-moment correlation
##
## data: PM2.5 and log(visibility)
## t = -37.4783, df = 986, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.7910712 -0.7395061
## sample estimates:
## cor
## -0.7665213
## Warning: Removed 22 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 25 rows containing non-finite values (stat_boxplot).
## Warning: Removed 9 rows containing non-finite values (stat_boxplot).
## Warning: Removed 15 rows containing non-finite values (stat_boxplot).
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).